0: Preparation

Defining the input and output files

Loading libraries

library(knitr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2

Causative variant

Variant fixation

Fixation probability

Fixation time

# Function to determine the number of rows until fixation is reached
time_to_fixation <- function(data, threshold = 0.9) {
  # Find the index of the first row where the allele frequency is 0.9 or higher
  fixation_index <- which(data$allele_frequency >= threshold)[1]
  
  # Return the number of rows until fixation is reached
  return(fixation_index - 1)
}

Summary - Variant fixation

Selection_coefficient Fixation_probability Avg_Fixation_time Min_fixation_time Max_fixation_time
s=0.1 0.4 35.55 29 39
s=0.2 9.0 33.95 24 39
s=0.3 27.8 26.65 19 34
s=0.4 40.0 22.30 13 38
s=0.5 51.3 14.45 10 23
s=0.6 37.0 12.55 10 18
s=0.7 46.5 9.95 6 16
s=0.8 42.6 7.60 5 11

Causative variant windows

Variant position

Window lengths

Causative variant windows

Summary - Causative variant windows

Selection_coefficient Avg_Length_Mb Avg_window_freq Min_freq Max_freq Avg_freq_variant_100k_window
1 s=0.1 4.425 56.0 12.9 100 57.1
4 s=0.4 4.575 75.9 18.6 100 76.5
2 s=0.2 4.770 57.8 7.1 100 58.7
3 s=0.3 4.785 71.7 20.0 100 72.7
8 s=0.8 5.120 81.3 15.7 100 81.5
7 s=0.7 5.210 84.8 31.4 100 85.6
5 s=0.5 5.385 83.9 22.9 100 85.1
6 s=0.6 5.485 78.0 10.0 100 78.5

Standard Error Confidence interval function

Confidence level <=> konfidensgrad

# Function to calculate standard error and its confidence interval
standard_error_confidence_interval_fun <- function(observed_data, confidence_level = 0.95) {
  
  # Calculate standard error
  n <- length(observed_data)
  standard_deviation <- sd(observed_data)
  standard_error <- standard_deviation / sqrt(n - 1)
  
  # Calculate confidence interval based on standard error

  alpha <- (1 - confidence_level) / 2
  margin_of_error <- qnorm(1 - alpha) * standard_error
  mean_estimate <- mean(observed_data)
  # Calculate the percentiles for the confidence interval
  confidence_interval_lower_bound <- mean_estimate - margin_of_error # 2.5th percentile (2σ)
  confidence_interval_upper_bound <- mean_estimate + margin_of_error # 97.5th percentile (2σ)
  
  # Return confidence interval
  return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
}





# # Function to calculate bootstrap confidence intervals
# standard_error_confidence_interval_fun <- function(observed_data, n_bootstraps = 1000, confidence_level = 0.95) {
#   
#   # Function to calculate the mean for each bootstrap sample
#   compute_bootstrap_mean_fun <- function(observed_data_urn) {
#     bootstrap_dataset <- sample(observed_data_urn, replace = TRUE)
#     bootstrap_estimate <- mean(bootstrap_dataset)
#     return(bootstrap_estimate)
#   }
#   
#   # Perform bootstrap resampling
#   bootstrap_point_estimates <- replicate(n_bootstraps, compute_bootstrap_mean_fun(observed_data))
#   
#   # Calculate the percentiles for the confidence interval
#   alpha <- (1 - confidence_level) / 2
#   confidence_interval_lower_bound <- quantile(bootstrap_point_estimates, alpha) # 2.5th percentile
#   confidence_interval_upper_bound <- quantile(bootstrap_point_estimates, 1 - alpha) # 97.5th percentile
#   
#   # Return the confidence interval
#   return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
# }

1: ROH-Frequency

1.1 : Autosome ROH-frequencies

1.1.1 : Empirical - ROH frequency

1.1.2: Neutral Model - ROH frequency

1.1.3: Selection Model

1.1.4 Summary - ROH-hotspot threshold

## ROH-hotspot selection testing results://n
Model Avg_ROH_hotspot_threshold
Neutral 30.5
s=0.1 32.2
s=0.2 35.1
s=0.3 37.3
s=0.4 37.9
s=0.6 41.4
s=0.7 41.8
s=0.5 42.6
s=0.8 47.6
Empirical 75.0

1.2 ROH-hotspots - ROH Frequency and Length

2: Inbreeding coefficient

2.1 Empirical data

## Overall Average Avg_F_ROH for  german_shepherd : 0.2753012 //n

2.2 Neutral Model

## Point estimate F_ROH across all 20 simulations: 0.25007 //n
## [1] "Bootstrap 95% Confidence Interval: [0.23473890934696, 0.265401133510183]"

2.3 Selection Model

## Point estimate F_ROH across all 20 simulations for  selection_model_s01_chr3 : 0.2734885 //n[1] "Bootstrap 95% Confidence Interval: [0.259682877404132, 0.287294222595868]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s02_chr3 : 0.290046 //n[1] "Bootstrap 95% Confidence Interval: [0.268404249001561, 0.311687808141296]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s03_chr3 : 0.3161388 //n[1] "Bootstrap 95% Confidence Interval: [0.294101862625108, 0.338175708803464]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s04_chr3 : 0.3145064 //n[1] "Bootstrap 95% Confidence Interval: [0.286793720981368, 0.342219150447203]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s05_chr3 : 0.3394231 //n[1] "Bootstrap 95% Confidence Interval: [0.313208544370823, 0.36563759848632]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s06_chr3 : 0.348661 //n[1] "Bootstrap 95% Confidence Interval: [0.319210697417095, 0.378111345440048]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s07_chr3 : 0.3598673 //n[1] "Bootstrap 95% Confidence Interval: [0.337347371590621, 0.382387271266521]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s08_chr3 : 0.3911701 //n[1] "Bootstrap 95% Confidence Interval: [0.350246216491064, 0.432094012080364]"

2.4 F_ROH summary

Model F_ROH Lower_CI Upper_CI
Neutral 0.250 0.235 0.265
s=0.1 0.273 0.260 0.287
Empirical 0.275 NA NA
s=0.2 0.290 0.268 0.312
s=0.4 0.315 0.287 0.342
s=0.3 0.316 0.294 0.338
s=0.5 0.339 0.313 0.366
s=0.6 0.349 0.319 0.378
s=0.7 0.360 0.337 0.382
s=0.8 0.391 0.350 0.432

2.4.1 Visualizaing F_ROH

## Models where empirical F_ROH is within CI:
## [1] "s=0.1" "s=0.2"
## 
## Models where empirical F_ROH is outside CI:
## [1] "s=0.3"   "s=0.4"   "s=0.5"   "s=0.6"   "s=0.7"   "s=0.8"   "Neutral"

3: Expected Heterozygosity

3.1 Empirical data

3.2 Neutral Model

3.3 Selection Model

## Uncommented because change of analysis

3.4 Causative Variant Window

## Point estimate H_e across all 20 simulations for  s01_chr3 : 0.05929176 //n[1] "Bootstrap 95% Confidence Interval: [0.0433130104967301, 0.0752705108634813]"
## Point estimate H_e across all 20 simulations for  s02_chr3 : 0.05801723 //n[1] "Bootstrap 95% Confidence Interval: [0.0419902747617942, 0.0740441924354399]"
## Point estimate H_e across all 20 simulations for  s03_chr3 : 0.03775639 //n[1] "Bootstrap 95% Confidence Interval: [0.0220355491960428, 0.0534772391642509]"
## Point estimate H_e across all 20 simulations for  s04_chr3 : 0.03080788 //n[1] "Bootstrap 95% Confidence Interval: [0.0148546775692621, 0.0467610731950587]"
## Point estimate H_e across all 20 simulations for  s05_chr3 : 0.02331459 //n[1] "Bootstrap 95% Confidence Interval: [0.00885300142719239, 0.0377761805510335]"
## Point estimate H_e across all 20 simulations for  s06_chr3 : 0.03246187 //n[1] "Bootstrap 95% Confidence Interval: [0.0140181073288633, 0.0509056361685207]"
## Point estimate H_e across all 20 simulations for  s07_chr3 : 0.01976834 //n[1] "Bootstrap 95% Confidence Interval: [0.00543074078237546, 0.0341059426184369]"
## Point estimate H_e across all 20 simulations for  s08_chr3 : 0.02504322 //n[1] "Bootstrap 95% Confidence Interval: [0.0104102212736186, 0.0396762237506187]"

3.4 Genomewide 5th percentile comparison - Expected Heterozygosity Summary

Model H_e_5th_percentile Lower_CI Upper_CI
Neutral 0.12797 0.12039 0.13555
s08_chr3 0.12831 0.12031 0.13632
s01_chr3 0.12840 0.12246 0.13433
s02_chr3 0.13008 0.12140 0.13876
s04_chr3 0.13092 0.12240 0.13943
s03_chr3 0.13443 0.12843 0.14043
s06_chr3 0.13551 0.12673 0.14429
s07_chr3 0.13766 0.12616 0.14916
s05_chr3 0.14148 0.13333 0.14964
Empirical NA NA NA

4: Summary

4.0 General comparison

4.0.1 ROH-hotspot threshold comparison

## 
##  ROH-hotspot threshold comparison between the different datasets
Model Avg_ROH_hotspot_threshold
Neutral 30.5
s=0.1 32.2
s=0.2 35.1
s=0.3 37.3
s=0.4 37.9
s=0.6 41.4
s=0.7 41.8
s=0.5 42.6
s=0.8 47.6
Empirical 75.0

4.0.2 F_ROH comparison

Model F_ROH Lower_CI Upper_CI
Neutral 0.250 0.235 0.265
s=0.1 0.273 0.260 0.287
Empirical 0.275 NA NA
s=0.2 0.290 0.268 0.312
s=0.4 0.315 0.287 0.342
s=0.3 0.316 0.294 0.338
s=0.5 0.339 0.313 0.366
s=0.6 0.349 0.319 0.378
s=0.7 0.360 0.337 0.382
s=0.8 0.391 0.350 0.432

4.1: Hotspot comparison

4.1.1: Selection test (Sweep test)

## [1] "Selection test results"
## [1] "ROH-hotspot windows with an mean H_e Value lower or equal to the lower confidence interval of the fifth percentile of the neutral model are classified as being under selection"
## [1] "5th percentile of the neutral model is: 0.1203882"
Name Under_selection Window_based_Average_H_e
Hotspot_chr3_window_1 No 0.12799
Hotspot_chr3_window_3 No 0.13260
Hotspot_chr17_window_2 No 0.14866
Hotspot_chr3_window_2 No 0.15005
Hotspot_chr17_window_1 No 0.15685
Hotspot_chr19_window_1 No 0.17061
## [1] "ROH-hotspots under selection:"
Name Under_selection Window_based_Average_H_e

4.1.2: Selection Strength Test (Sweep test)

4.1.1.1 Extracting Causative windows under selection

Model H_e Lower_CI Upper_CI Under_Selection
s=0.7 0.01977 0.00543 0.03411 Yes
s=0.5 0.02331 0.00885 0.03778 Yes
s=0.8 0.02504 0.01041 0.03968 Yes
s=0.4 0.03081 0.01485 0.04676 Yes
s=0.6 0.03246 0.01402 0.05091 Yes
s=0.3 0.03776 0.02204 0.05348 Yes
s=0.2 0.05802 0.04199 0.07404 Yes
s=0.1 0.05929 0.04331 0.07527 Yes
Neutral 0.12797 0.12039 0.13555 No

4.1.1.1 Extracting Hotspots under selection

*** Behöver bootstrap av 5th percentiles från neutral model

Hotspot - Causative Window - Comparison

Model Lengths_Mb Avg_ROH_Freq
Hotspot_chr3_window_1 10.900 81.3
s=0.6 5.485 78.0
s=0.5 5.385 83.9
s=0.7 5.210 84.8
s=0.8 5.120 81.3
s=0.3 4.785 71.7
s=0.2 4.770 57.8
s=0.4 4.575 75.9
s=0.1 4.425 56.0
Hotspot_chr19_window_1 4.400 75.6
Hotspot_chr3_window_2 3.200 76.3
Hotspot_chr3_window_3 2.700 77.6
Hotspot_chr17_window_1 2.000 76.4
Hotspot_chr17_window_2 0.600 76.1

Visualizing and scaling

5 Visualizing Expected Heterozygoisty

5.1 Bootstrap CI for mean 5th percentile H_e

## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

## Models where empirical H_e is within CI for Hotspot_chr3_window_1 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr3_window_1 :
## [1] "s=0.1" "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

## Models where empirical H_e is within CI for Hotspot_chr3_window_3 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr3_window_3 :
## [1] "s=0.1" "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

## Models where empirical H_e is within CI for Hotspot_chr17_window_2 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr17_window_2 :
## [1] "s=0.1" "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

## Models where empirical H_e is within CI for Hotspot_chr3_window_2 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr3_window_2 :
## [1] "s=0.1" "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

## Models where empirical H_e is within CI for Hotspot_chr17_window_1 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr17_window_1 :
## [1] "s=0.1" "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

## Models where empirical H_e is within CI for Hotspot_chr19_window_1 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr19_window_1 :
## [1] "s=0.1" "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"

5.2 5th percentile of the extreme H_e values